_UNDOCUMENTED CORNER_ edited by Andrew Schulman written by Pete Davis [LISTING ONE] /* WHSTRUCT.H--Windows Help File Internal Records--Pete Davis and Ron Burk, June 1993. See "Undocumented Corner," DDJ, September 1993 */ typedef unsigned long DWORD; typedef unsigned int WORD; typedef unsigned char BYTE; #define HELP_MAGIC 0x00035F3FL /* Help file Header record */ typedef struct HELPHEADER { DWORD MagicNumber; /* 0x00035F3F */ long WHIFS; /* File offset of WHIFS header */ long Negative1; long FileSize; /* Size of entire .HLP File */ } HELPHEADER; /* File Header for WHIFS files */ typedef struct FILEHEADER { long FilePlusHeader; /* File size including this header */ long FileSize; /* File size not including header */ char TermNull; } FILEHEADER; /* Help Directory BTREE */ typedef struct WHIFSBTREEHEADER { char Magic[18]; /* Not exactly magic for some .MVB files */ char Garbage[13]; int MustBeZero; /* Probably shows up when Help > ~40 megs */ int NSplits; /* Number of page split Btree has suffered */ int RootPage; /* Page # of root page */ int MustBeNegOne; /* Probably shows up when B-Tree is HUGE!! */ int TotalPages; /* total # to 2Kb pages in Btree */ int NLevels; /* Number of levels in this Btree */ DWORD TotalWHIFSEntries; } WHIFSBTREEHEADER; /* Modified B-Tree Node header to handle a pointer to the page */ typedef struct BTREENODEHEADER { WORD Signature; /* Signature word */ int NEntries; /* Number of entries */ int PreviousPage; /* Index of Previous Page */ int NextPage; /* Index of Next Page */ char *BTData; /* Pointer to B-Tree's data */ } BTREENODEHEADER; /* Modified B-Tree Index header to handle a pointer to the page */ typedef struct BTREEINDEXHEADER { WORD Signature; /* Signature word */ int NEntries; /* Number of entries in node */ char *IdxData; } BTREEINDEXHEADER; /* Phrase header for uncompressed |Phrases file */ typedef struct PHRASEHDR { int NumPhrases; /* Number of phrases in table */ WORD OneHundred; /* 0x0100 */ } PHRASEHDR; /* Phrase header for compressed |Phrases file */ typedef struct ALTPHRASEHDR { int NumPhrases; /* Number of phrases in table */ WORD OneHundred; /* 0x0100 */ long PhrasesSize; /* Amount of space uncompressed phrases requires */ } ALTPHRASEHDR; /* Flags for |SYSTEM header Flags field below: Unfortunately, none of these flags are particularly solid. The 0x0004 works MOST of the time. Another flag, 0x0008, appears both in Win32 .HLP files, and in files with Phrase compression but without LZ77 compression. */ #define NO_COMPRESSION_310 0x0000 #define COMPRESSION_310 0x0004 #define SYSFLAG_300 0x000A /* Header for |SYSTEM file */ typedef struct SYSTEMHEADER { BYTE Magic; /* 0x6C */ BYTE Version; /* Version # */ BYTE Revision; /* Revision code */ BYTE Always0; /* Unknown */ WORD Always1; /* Always 0x0001 */ DWORD GenDate; /* Date/Time that the help file was generated */ WORD Flags; /* Values seen: 0x0000 0x0004, 0x0008, 0x000A */ } SYSTEMHEADER; /* Types for SYSTEMREC RecordType below: note that other record types, such as 0x0A, 0x0B, 0x0C, 0x0D, shown up in the large .MVB files used by the MSDN CD-ROM and Cinemania products. */ #define HPJ_TITLE 0x0001 /* Title from .HPJ file */ #define HPJ_COPYRIGHT 0x0002 /* Copyright notice from .HPJ file */ #define HPJ_CONTENTS 0x0003 /* Contents= from .HPJ */ #define MACRO_DATA 0x0004 /* RData = 4 nulls if no macros */ #define ICON_DATA 0x0005 /* Data for Icon */ #define HPJ_SECWINDOWS 0x0006 /* Secondary window info in .HPJ */ #define HPJ_CITATION 0x0008 /* Citation= under [OPTIONS] */ /* Secondary Window Record following type 0x0006 System Record */ typedef struct SECWINDOW { WORD Flags; /* Flags (See Below) */ BYTE Type[10]; /* Type of window */ BYTE Name[9]; /* Window name */ BYTE Caption[51]; /* Caption for window */ WORD X; /* X coordinate to start at */ WORD Y; /* Y coordinate to start at */ WORD Width; /* Width to create for */ WORD Height; /* Height to create for */ WORD Maximize; /* Maximize flag */ BYTE Rgb[3]; /* RGB for background */ BYTE Unknown1; /* No known use */ BYTE RgbNsr[3]; /* RGB for non scrollable region */ BYTE Unknown2; /* No known use */ } SECWINDOW; /* Values for Secondary Window Flags */ #define WSYSFLAG_TYPE 0x0001 /* Type is valid */ #define WSYSFLAG_NAME 0x0002 /* Name is valid */ #define WSYSFLAG_CAPTION 0x0004 /* Ccaption is valid */ #define WSYSFLAG_X 0x0008 /* X is valid */ #define WSYSFLAG_Y 0x0010 /* Y is valid */ #define WSYSFLAG_WIDTH 0x0020 /* Width is valid */ #define WSYSFLAG_HEIGHT 0x0040 /* Height is valid */ #define WSYSFLAG_MAXIMIZE 0x0080 /* Maximize is valid */ #define WSYSFLAG_RGB 0x0100 /* Rgb is valid */ #define WSYSFLAG_RGBNSR 0x0200 /* RgbNsr is valid */ #define WSYSFLAG_TOP 0x0400 /* On top was set in HPJ file */ /* Help Compiler 3.1 System record. Multiple records possible */ typedef struct SYSTEMREC { WORD RecordType; /* Type of Data in record */ WORD DataSize; /* Size of RData */ char *RData; /* Raw data (Icon, title, etc) */ } SYSTEMREC; /* Header for |TOMAP file */ typedef struct TOMAPHEADER { long IndexTopic; /* Index topic for help file */ long Reserved[15]; int ToMapLen; /* Number of topic pointers */ long *TopicPtr; /* Pointer to all the topics */ } TOMAPHEADER; [LISTING TWO] /* HELPDIR.C -- List all internal files with a Windows .HLP file. WHIFS = Windows Help Internal File System -- Pete Davis, June 1993 bcc helpdir.c See "Undocumented Corner," DDJ, September 1993 */ #pragma pack(1) #include #include #include #include #include "whstruct.h" #define PAGE_SIZE 1024L /* 1k pages -- must be long! */ void fail(const char *s) { puts(s); exit(1); } int main(int argc, char *argv[]) { HELPHEADER HelpHdr; WHIFSBTREEHEADER WHIFSHdr; BTREENODEHEADER WHIFSNode; int file, aPage, c; long WHIFSStart, FileOffset; FILE *HelpFile; if ((HelpFile=fopen(argv[1], "rb")) == NULL) fail("can't open file"); /* Get Help header, go to WHIFS and get WHIFS Header */ fread(&HelpHdr, sizeof(HelpHdr), 1, HelpFile); if (HelpHdr.MagicNumber != HELP_MAGIC) fail("not a Windows help file"); fseek(HelpFile, HelpHdr.WHIFS, SEEK_SET); fread(&WHIFSHdr, sizeof(WHIFSHdr), 1, HelpFile); /* WHIFS starts after the WHIFSHdr */ WHIFSStart = HelpHdr.WHIFS + sizeof(WHIFSHdr); file=1; /* Goto WHIFS Root */ fseek(HelpFile, WHIFSStart + (PAGE_SIZE * WHIFSHdr.RootPage), SEEK_SET); /* Find the first leaf node */ while (file < WHIFSHdr.NLevels) { /* if it's not a leaf, we don't need last 2 fields */ fread(&WHIFSNode, 4, 1, HelpFile); /* Find page pointer to first node in index */ fread(&aPage, sizeof(int), 1, HelpFile); fseek(HelpFile, WHIFSStart + (PAGE_SIZE * aPage), SEEK_SET); file++; } #ifdef DO_MACROS { extern void do_macros(FILE *HelpFile, long WHIFSStart); do_macros(HelpFile, WHIFSStart); } #else /* Go through linked list of leaf nodes */ for (;;) { if (! fread(&WHIFSNode, sizeof(WHIFSNode)-2, 1, HelpFile)) break; /* List all entries in node */ for (file = 1; file <= WHIFSNode.NEntries; file ++) { while (c = fgetc(HelpFile)) putchar(c); fread(&FileOffset, sizeof(FileOffset), 1, HelpFile); printf(" \t0x%08lX\n", FileOffset); } if (WHIFSNode.NextPage == -1) break; else fseek(HelpFile,WHIFSStart+(WHIFSNode.NextPage*PAGE_SIZE),SEEK_SET); } #endif return 1; } [LISTING THREE] /* WHMACROS.C -- Get macros from a .HLP file. Used by HELPDIR.C if #define DO_MACROS -- Pete Davis and Andrew Schulman, bcc -DDO_MACROS whmacros.c helpdir.c See "Undocumented Corner," DDJ, September 1993 */ #pragma pack(1) #include #include #include #include #include "whstruct.h" extern void fail(const char *s); #define PAGE_SIZE 1024L /* 1k pages -- must be long! */ void do_macros(FILE *HelpFile, long WHIFSStart) { BTREENODEHEADER WHIFSNode; SYSTEMHEADER SystemHdr; SYSTEMREC SystemRec; FILEHEADER FileHdr; long SystemOffset=0, FileOffset, FileStart; char filename[20], *data; int *Offsets; int c, i, file, txt; /* Find the System file. */ do { fread(&WHIFSNode, sizeof(WHIFSNode) - 2, 1, HelpFile); /* Search all entries in node */ for (file = 1; file <= WHIFSNode.NEntries; file ++) { i = 0; while ( c = fgetc(HelpFile) ) filename[i++]=c; filename[i] = 0; fread(&FileOffset, sizeof(FileOffset), 1, HelpFile); if (strcmp(filename, "|SYSTEM") == 0) { SystemOffset = FileOffset; break; } } if (WHIFSNode.NextPage != -1) fseek(HelpFile, WHIFSStart + (WHIFSNode.NextPage * PAGE_SIZE), SEEK_SET); } while (WHIFSNode.NextPage != -1); if (! SystemOffset) fail("Can't locate |SYSTEM file"); /* Get System header */ fseek(HelpFile, SystemOffset, SEEK_SET); fread(&FileHdr, sizeof(FileHdr), 1, HelpFile); fread(&SystemHdr, sizeof(SystemHdr), 1, HelpFile); FileStart = SystemOffset + sizeof(FileHdr) + sizeof(SystemHdr); FileOffset = 0; while (FileOffset < FileHdr.FileSize) { fseek(HelpFile, FileStart + FileOffset, SEEK_SET); fread(&SystemRec, sizeof(SystemRec)-1, 1, HelpFile); FileOffset += (sizeof(SystemRec) + SystemRec.DataSize - 1); if (SystemRec.RecordType == MACRO_DATA) { if (! (data = (char *) malloc(SystemRec.DataSize+1))) fail("insufficient memory"); fread(data, SystemRec.DataSize, 1, HelpFile); data[SystemRec.DataSize] = '\0'; printf("%s\n\n", data); free(data); } } } Figure 1: Annotated hex dump of portions of a .HLP file. (a) All .HLP files start with a HELPHEADER. The first long is the .HLP magic number (0x035F3F). The next long is the file offset of the WHIFS header (in Figure 1(b), that's 0x041F); (b) the WHIFS starts off with a WHIFSBTREEHEADER, immediately followed by the WHIFS directory, which contains null-terminated file names followed by the individual WHIFS file's offset within the larger .HLP file. Here, bag.ini is at offset 0x10, |CONTEXT is at 0x0362B3, and |CTXOMAP is at 0x032B02; (c) each internal file begins with FILEHEADER structure, which specifies the file's size both with and without the header, followed by a 0. Here, bag.ini is 0x040F bytes with the header, and 0x0F06 without. The file data itself (evidentally, some kind of initialization file) starts immediately after the header. (a) D:\MIPS>dump msmail32.hlp -bytes 8 00000000 | 3F 5F 03 00 1F 04 00 00 | ?_...... (b) D:\MIPS>dump msmail32.hlp -offset 0x041f 0000041f | 2F 04 00 00 26 04 00 00 04 3B 29 02 04 00 04 7A | /...&....;)....z 0000042f | 34 00 00 43 3A 5C 7E 68 63 35 00 09 02 62 6D 00 | 4..C:\~hc5...bm. 0000043f | 00 00 00 00 00 FF FF 01 00 01 00 1E 00 00 00 C1 | ................ 0000044f | 02 1E 00 FF FF FF FF 62 61 67 2E 69 6E 69 00 10 | .......bag.ini.. 0000045f | 00 00 00 7C 43 4F 4E 54 45 58 54 00 B3 62 03 00 | ...|CONTEXT..b.. 0000046f | 7C 43 54 58 4F 4D 41 50 00 02 2B 03 00 7C 46 4F | |CTXOMAP..+..|FO ; ... etc. ... (c) D:\MIPS>dump d:\mips\msmail32.hlp -offset 0x10 00000010 | 0F 04 00 00 06 04 00 00 00 0D 0A 5B 62 61 67 2E | ...........[bag. 00000020 | 69 6E 69 5D 0D 0A 67 72 6F 75 70 63 6F 75 6E 74 | ini]..groupcount 00000030 | 3D 31 34 0D 0A 67 72 6F 75 70 31 3D 42 61 63 6B | =14..group1=Back 00000040 | 75 70 0D 0A 67 72 6F 75 70 32 3D 43 6C 69 70 62 | up..group2=Clipb ; ... etc. ... Figure 2: HELPDIR output for the .HLP file hex dumped in Figure 1. D:\MIPS>c:\ddj\helpdir msmail32.hlp bag.ini 0x00000010 |CONTEXT 0x000362B3 |CTXOMAP 0x00032B02 |FONT 0x000327D2 |KWBTREE 0x00033255 |KWDATA 0x00032ED5 |KWMAP 0x0003323E |SYSTEM 0x0000084E |TOPIC 0x00000A53 |TTLBTREE 0x00034A84 |bm0 0x00037AE2 ; ... etc. ... Figure 3: Selected macros in Microsoft Cinemania and the MSDN CD-ROM, as displayed by WHMACROS. C:\DDJ>dir d:\content\*.mvb CINMANIA MVB 139104719 08-18-92 12:00a C:\DDJ>whmacros d:\content\cinmania.mvb RegisterRoutine("ftui","InitRoutines","SU") RegisterRoutine("ftui","ExecFullTextSearch","USSS") ; ... InitRoutines(qchPath,1) ; ... CreateButton("ftSearch", "&Search", \ "ExecFullTextSearch(hwndApp, qchPath, `', `')") ; ... C:\DDJ>dir d:\*.mvb MSDNCD MVB 270353088 04-05-93 5:58p C:\DDJ>whmacros d:\msdncd.mvb RegisterRoutine("msdncd", "Navigator", "USS") Navigator(hwndApp, "Load", qchPath) ; ... CreateButton("btn_prv","<>","Navigator(hwndApp,\"Next\",\"\")") Table 1: WinHelp internal files Function Description bmx Bitmap files, numbered (bm0, bm24, bm12, and so on. Do not start with a |) |CONTEXT Context topic table |CTXOMAP Context mapping to topics |FONT Fonts available to help file |KWBTREE Keyword B-tree file |KWDATA Keyword mappings to topic file |KWMAP Map into the KWBTREE for quick access |Phrases A list of phrases used for compression of the |TOPIC file |SYSTEM Contains mostly information from .HPJ file |TOMAP List of pointers to topics |TOPIC Contains the actual help text (usually compressed) |TTLBTREE Topic titles B-tree baggage Appears under the filename exactly as specified in help project